Source of Data

More data
- US 1993 life table for the same age group
us.93 <- data.frame(x = graunt$x, lx.93 = c(100, 99, 99, 98, 97, 95, 92, 84, 70))
Into one single data frame
- Combine two data frames into one single data frame
(graunt.us <- data.frame(graunt, lx.93 = us.93$lx))
## x lx.17th lx.93
## 1 0 100 100
## 2 6 64 99
## 3 16 40 99
## 4 26 25 98
## 5 36 16 97
## 6 46 10 95
## 7 56 6 92
## 8 66 3 84
## 9 76 1 70
Life Expectancy
- The basic principle is that the area under the survival function is the life expectancy.
- \(X \ge 0\), \(X \sim F(x)\) => \(X \equiv F^{-1}(U), U \sim U(0,1)\). therefore,
- \(E(X) = E\{F^{-1}(U)\} = \int_{0}^{1} F^{-1}(u)du = \int_0^{\infty} 1-F(x) dx = \int_{0}^{\infty} S(x) dx\)
Step by step approach to draw survival curve
1. Basic plot with points and lines, compare the following threes methods
par(mfrow = c(2, 2))
plot(graunt)
plot(lx.17th ~ x, data = graunt)
plot(graunt)
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")

2. Denote the ages and observed survival rates on the axes
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)

3. Denote the age 0 and 76 by dotted lines
plot(graunt, ann=F, xaxt="n", yaxt="n", type = "b")
axis(side = 1, at=graunt$x, labels=graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)

Setting up coordinates for polygon() (Clockwise)
(graunt.x <- c(graunt$x, 0))
## [1] 0 6 16 26 36 46 56 66 76 0
(graunt.y <- c(graunt$lx.17th, 0))
## [1] 100 64 40 25 16 10 6 3 1 0
4. Shading
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
polygon(graunt.x, graunt.y, density = 15, angle = 135)

5. Grids
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
polygon(graunt.x, graunt.y, density = 15)
abline(v = graunt$x, lty = 2)

6. Title, x-axis label, and y-axis label
par(family = "MS Gothic")
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
polygon(graunt.x, graunt.y, density = 15)
abline(v = graunt$x, lty = 2)
main.title <- "Graunt's Life Distribution"
x.lab <- "Age (years)"
y.lab <- "Survival Rates (%)"
title(main = main.title, xlab = x.lab, ylab = y.lab)

Area under the curve
- The area under the curve can be approximated by the sum of the areas of trapezoids, therefore the area is
- \(\sum_{i=1}^{n-1} (x_{i+1}-x_i)\times\frac{1}{2}(y_i + y_{i+1})\).
diff(), head(), and tail() can be used to write a function to compute the area easily.
area.R <- function(x, y) {
sum(diff(x) * (head(y, -1) + tail(y, -1))/2)
}
area.R(graunt$x, graunt$lx.17th)/100
## [1] 18.17
Comparison with US 1993 life table
- The shaded area between the survival functions of Graunt’s and US 1993 represents the difference of life expectancies.
- Draw Graunt’s first with axes, lower and upper limits
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v=c(0, 76), lty = 2)
abline(v = c(0, 76), lty = 2)

2. Add US 1993 survival function
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")

3. Actually, US 1993 life table is truncated at the age 76. Specify that point.
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)

4. Using `las = 1` to specify 70%.
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)
axis(side = 2, at = 70, labels = 70, las = 1)

Setting coordinates for polygon()
us.graunt.x <- c(us.93$x, rev(graunt$x))
us.graunt.y <- c(us.93$lx.93, rev(graunt$lx.17th))
5. Shading
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)
axis(side = 2, at = 70, labels = 70, las = 1)
polygon(us.graunt.x, us.graunt.y, density = 15, col = "red", border = NA)

6. Grids
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)
axis(side = 2, at = 70, labels = 70, las = 1)
polygon(us.graunt.x, us.graunt.y, density = 15, col = "red", border = NA)
abline(v = graunt$x, lty = 2)

7. Title, x-axis and y-axis labels
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)
axis(side = 2, at = 70, labels = 70, las = 1)
polygon(us.graunt.x, us.graunt.y, density = 15, col = "red", border = NA)
abline(v = graunt$x, lty = 2)
main.title.g.us <- "Survival Function of Graunt and US 1993"
title(main = main.title.g.us, xlab = x.lab, ylab = y.lab)

dev.copy(device = png, file = "../pics/graunt_us93.png")
## quartz_off_screen
## 3
Life expectancy
- The area under the US 1993 survival function is
area.R(us.93$x, us.93$lx.93)/100
## [1] 70.92
- The area of shaded region is
area.R(us.93$x, us.93$lx.93)/100 - area.R(graunt$x, graunt$lx.17th)/100
## [1] 52.75
Comparison with Halley’s life table
Halley’s life table
age <- 0:84
lx <- c(1238, 1000, 855, 798, 760, 732, 710, 692, 680, 670, 661, 653, 646, 640, 634, 628, 622, 616, 610, 604, 598, 592, 586, 579, 573, 567, 560, 553, 546, 539, 531, 523, 515, 507, 499, 490, 481, 472, 463, 454, 445, 436, 427, 417, 407, 397, 387, 377, 367, 357, 346, 335, 324, 313, 302, 292, 282, 272, 262, 252, 242, 232, 222, 212, 202, 192, 182, 172, 162, 152, 142, 131, 120, 109, 98, 88, 78, 68, 58, 50, 41, 34, 28, 23, 20)
length(lx)
## [1] 85
halley <- data.frame(age, lx)
halley$px <- round(halley$lx/1238*100, digits = 1)
halley
## age lx px
## 1 0 1238 100.0
## 2 1 1000 80.8
## 3 2 855 69.1
## 4 3 798 64.5
## 5 4 760 61.4
## 6 5 732 59.1
## 7 6 710 57.4
## 8 7 692 55.9
## 9 8 680 54.9
## 10 9 670 54.1
## 11 10 661 53.4
## 12 11 653 52.7
## 13 12 646 52.2
## 14 13 640 51.7
## 15 14 634 51.2
## 16 15 628 50.7
## 17 16 622 50.2
## 18 17 616 49.8
## 19 18 610 49.3
## 20 19 604 48.8
## 21 20 598 48.3
## 22 21 592 47.8
## 23 22 586 47.3
## 24 23 579 46.8
## 25 24 573 46.3
## 26 25 567 45.8
## 27 26 560 45.2
## 28 27 553 44.7
## 29 28 546 44.1
## 30 29 539 43.5
## 31 30 531 42.9
## 32 31 523 42.2
## 33 32 515 41.6
## 34 33 507 41.0
## 35 34 499 40.3
## 36 35 490 39.6
## 37 36 481 38.9
## 38 37 472 38.1
## 39 38 463 37.4
## 40 39 454 36.7
## 41 40 445 35.9
## 42 41 436 35.2
## 43 42 427 34.5
## 44 43 417 33.7
## 45 44 407 32.9
## 46 45 397 32.1
## 47 46 387 31.3
## 48 47 377 30.5
## 49 48 367 29.6
## 50 49 357 28.8
## 51 50 346 27.9
## 52 51 335 27.1
## 53 52 324 26.2
## 54 53 313 25.3
## 55 54 302 24.4
## 56 55 292 23.6
## 57 56 282 22.8
## 58 57 272 22.0
## 59 58 262 21.2
## 60 59 252 20.4
## 61 60 242 19.5
## 62 61 232 18.7
## 63 62 222 17.9
## 64 63 212 17.1
## 65 64 202 16.3
## 66 65 192 15.5
## 67 66 182 14.7
## 68 67 172 13.9
## 69 68 162 13.1
## 70 69 152 12.3
## 71 70 142 11.5
## 72 71 131 10.6
## 73 72 120 9.7
## 74 73 109 8.8
## 75 74 98 7.9
## 76 75 88 7.1
## 77 76 78 6.3
## 78 77 68 5.5
## 79 78 58 4.7
## 80 79 50 4.0
## 81 80 41 3.3
## 82 81 34 2.7
## 83 82 28 2.3
## 84 83 23 1.9
## 85 84 20 1.6
R base graphics
- To make the comparison easy, plot the points at the same age group of Graunt’s, 0, 6, 16, 26, 36, 46, 56, 66, 76. Step by step approach
- Halley’s survival function first
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")

2. Mark the points at 0, 6, 16, 26, 36, 46, 56, 66, 76 on Halley's survival function.
age.graunt <- age %in% graunt$x
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px[age.graunt] ~ age[age.graunt], data = halley)

Using subset()
halley.graunt <- subset(halley, age.graunt)
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)

3. Add Graunt's survival function
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")

4. x-axis label and y-axis label with `las = 1`
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)

5. Vertical dotted lines at the ages 0, 76, and 84
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v = c(0, 76, 84), lty = 2)

6. Specify the developers at proper coordinates with `text()`
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v = c(0, 76, 84), lty = 2)
text(x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley"))

6. Main title, x-axis label, and y-axis label
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v = c(0, 76, 84), lty = 2)
text(x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley"))
main.title.2 <- "Survival Function of Graunt and Halley"
title(main = main.title.2, xlab = x.lab, ylab = y.lab)

Polygon
- Setting the coordinates for
polygon(). The intersection is found at x = 10.8, y = 52.8 with locator(1) and couple of trial and errors
poly.1.x <- c(graunt$x[1:2], 10.8, halley$age[11:1])
poly.1.y <- c(graunt$lx.17th[1:2], 52.8, halley$px[11:1])
* Lower region
poly.2.x <- c(10.8, halley$age[12:85], graunt$x[9:3])
poly.2.y <- c(52.8, halley$px[12:85], graunt$lx.17th[9:3])
7. Shading upper region first
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v=c(0, 76, 84), lty = 2)
text(x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley"))
title(main = main.title.2, xlab = x.lab, ylab = y.lab)
polygon(poly.1.x, poly.1.y, angle = 45, density = 15, col = "blue")

8. Shading lower region next
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v=c(0, 76, 84), lty = 2)
text(x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley"))
title(main = main.title.2, xlab = x.lab, ylab = y.lab)
polygon(poly.1.x, poly.1.y, angle = 45, density = 15, col = "blue")
polygon(poly.2.x, poly.2.y, angle = 45, density = 15, col = "red")

dev.copy(device = png, file = "../pics/graunt_halley.png")
## quartz_off_screen
## 4
Life expectancy
- Compute the difference of life expectancies
(life.exp.halley <- area.R(halley$age, halley$px)/100)
## [1] 27.872
(life.exp.graunt <- area.R(graunt$x, graunt$lx.17th)/100)
## [1] 18.17
ggplot
library(ggplot2)
Data Reshape
- Attach
reshape2 package to change wide format to long format
library(reshape2)
graunt.us.melt <- melt(graunt.us, id.vars = "x", measure.vars = c("lx.17th", "lx.93"), value.name = "lx", variable.name = "times")
graunt.us.melt
## x times lx
## 1 0 lx.17th 100
## 2 6 lx.17th 64
## 3 16 lx.17th 40
## 4 26 lx.17th 25
## 5 36 lx.17th 16
## 6 46 lx.17th 10
## 7 56 lx.17th 6
## 8 66 lx.17th 3
## 9 76 lx.17th 1
## 10 0 lx.93 100
## 11 6 lx.93 99
## 12 16 lx.93 99
## 13 26 lx.93 98
## 14 36 lx.93 97
## 15 46 lx.93 95
## 16 56 lx.93 92
## 17 66 lx.93 84
## 18 76 lx.93 70
str(graunt.us.melt)
## 'data.frame': 18 obs. of 3 variables:
## $ x : num 0 6 16 26 36 46 56 66 76 0 ...
## $ times: Factor w/ 2 levels "lx.17th","lx.93": 1 1 1 1 1 1 1 1 1 2 ...
## $ lx : num 100 64 40 25 16 10 6 3 1 100 ...
- Change factor levels of
times
levels(graunt.us.melt$times) <- c("17th", "1993")
str(graunt.us.melt)
## 'data.frame': 18 obs. of 3 variables:
## $ x : num 0 6 16 26 36 46 56 66 76 0 ...
## $ times: Factor w/ 2 levels "17th","1993": 1 1 1 1 1 1 1 1 1 2 ...
## $ lx : num 100 64 40 25 16 10 6 3 1 100 ...
Plot
Points and Lines
- Step by step approach to understand the grammar of ggplot
- We set
ggplot() to accept varying data.frame() and aes()in geom_polygon
g1 <- ggplot() + geom_point(data = graunt.us.melt, aes(x = x, y = lx, colour = times))
g1

g2 <- g1 + geom_line(data = graunt.us.melt, aes(x = x, y = lx, colour = times))
g2

g3 <- g2 + theme_bw()
g3

Polygon
- Reuse
us.graunt.x and us.graunt.y for polygon(). Note how to remove default legends.
p3 <- g3 + geom_polygon(data = data.frame(x = us.graunt.x, y = us.graunt.y), aes(x = x, y = y), alpha = 0.3, fill = "red")
p3

p4 <- p3 + guides(colour = "none")
p4

Change default annotations
Points and Lines
1. Change the x-axis and y-axis labels
(g4 <- g3 + xlab(x.lab) + ylab(y.lab))

2. Main title
(g4 <- g3 + xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.g.us))

3. Change legend title
(g4 <- g3 +
xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.g.us) +
labs(colour = "Era"))

4. Change legends.
(g4 <- g3 +
xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.g.us) +
labs(colour = "Era") +
scale_colour_discrete(labels = c("Graunt Era", "US 1993")))

- Place legends inside the plot
(g5 <- g4 + theme(legend.position = c(0.8, 0.5)))

- Change x-axis and y-axis tick marks
(g6 <- g5 + scale_x_continuous(breaks = graunt$x) + scale_y_continuous(breaks = graunt$lx.17th))

ggsave("../pics/graunt_us_plot.png", g6)
## Saving 6 x 6 in image
Polygon
- Add information to the plot drawn with
polygon()
- Start with p4
p4

2. Main title, x-axis and y-axis labels
(p5 <- p4 +
xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.g.us))

3. "Graunt Era", "US 1993", "Difference of Life Expectancies" at the proper position
(p6 <- p5 + annotate("text", x = c(20, 40, 70), y = c(20, 60, 90), label = c("Graunt Era", "Difference of\nLife Expectancies", "US 1993")))

4. x-axis and y-axis tick marks
(p7 <- p6 + scale_x_continuous(breaks = graunt$x) + scale_y_continuous(breaks = graunt$lx.17th))

ggsave("../pics/graunt_us_poly.png", p7)
## Saving 6 x 6 in image
Graunt and Halley
Data Reshaping
- Since the observed ages are different, we need to final structure of the data frame to be melted. So, create copies of
graunt and halley and extract parts of what we need and give feasible names.
graunt.2 <- graunt
names(graunt.2) <- c("x", "Graunt")
halley.2 <- halley[-2]
names(halley.2) <- c("x", "Halley")
graunt.halley.melt <- melt(list(graunt.2, halley.2), id.vars = "x", value.name = "lx", variable.name = "Who")
str(graunt.halley.melt)
## 'data.frame': 94 obs. of 4 variables:
## $ x : num 0 6 16 26 36 46 56 66 76 0 ...
## $ Who: Factor w/ 2 levels "Graunt","Halley": 1 1 1 1 1 1 1 1 1 2 ...
## $ lx : num 100 64 40 25 16 10 6 3 1 100 ...
## $ L1 : int 1 1 1 1 1 1 1 1 1 2 ...
graunt.halley.melt.g <- subset(graunt.halley.melt, graunt.halley.melt$x %in% graunt$x)
head(graunt.halley.melt.g, n = 10)
## x Who lx L1
## 1 0 Graunt 100 1
## 2 6 Graunt 64 1
## 3 16 Graunt 40 1
## 4 26 Graunt 25 1
## 5 36 Graunt 16 1
## 6 46 Graunt 10 1
## 7 56 Graunt 6 1
## 8 66 Graunt 3 1
## 9 76 Graunt 1 1
## 10 0 Halley 100 2
Survival Function, Step by Step
gh1 <- ggplot() + geom_line(data = graunt.halley.melt, aes(x = x, y = lx, colour = Who))
gh1

gh2 <- gh1 + geom_point(data = graunt.halley.melt.g, aes(x = x, y = lx, colour = Who))
gh2

gh3 <- gh2 +
theme_bw() +
xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.2)
gh3

gh4 <- gh3 +
theme(legend.position = c(0.8, 0.8)) +
annotate("text", x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley")) +
scale_x_continuous(breaks = c(graunt$x, 84)) + scale_y_continuous(breaks = c(graunt$lx.17th, halley$px[halley$age == 6]))
gh4

ggsave("../pics/graunt_halley_ggplot.png", gh4)
## Saving 6 x 6 in image
Polygon
ghp4 <- gh4 + geom_polygon(data = data.frame(x = poly.1.x, y = poly.1.y), aes(x = x, y = y), alpha = 0.3, fill = "blue")
ghp4

ghp5 <- ghp4 + geom_polygon(data = data.frame(x = poly.2.x, y = poly.2.y), aes(x = x, y = y), alpha = 0.3, fill = "red")
ghp5

ggsave("../pics/graunt_halley_poly_ggplot.png", ghp5)
## Saving 6 x 6 in image
dump() and source()
- Check out how to save and retrieve. Use
source() and load() for retrieval.
dump("area.R", file = "area.R")
save.image("graunt_halley_160329.rda")